package org.csdn.titan

import org.apache.commons.lang3.StringUtils
import org.apache.spark.graphx.{Edge, Graph}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, SparkSession}

object IDMaping {

  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession.builder()
      .appName(this.getClass.getName)
      .master("local[*]")
      .getOrCreate()

    val ds: Dataset[String] = spark.read.textFile("data/b_graphdata.txt")

    import spark.implicits._

    //把每一个数据构建图计算的点：(点标识,点数据)
    val vertices: RDD[(Long, String)] = ds.rdd.flatMap(line => {
      line.split(",")
        .filter(seg => StringUtils.isNotBlank(seg))
        .map(seg => (seg.hashCode.toLong, seg))

    })

    //vertices.foreach(println)

    val edges: RDD[Edge[String]] = ds.rdd.flatMap(line => {
      val segs: Array[String] = line.split(",")
      for (i <- 0 to segs.length - 2 if StringUtils.isNoneBlank(segs(i))) yield Edge(segs(i).hashCode.toLong, segs(i + 1).hashCode.toLong, "")

    })

    val graph = Graph(vertices, edges)
    val graph2 = graph.connectedComponents()
    val vertices1 = graph2.vertices

    /*(20876169,-754014823)
    (1178496,1178496)
    (101925,-754014823)
    (-577275978,-1704388056)
    (261665248,-1086643525)
    (1034110,-1704388056)
    (1895489036,1178496)
    (530535263,-754014823)
    (-1704388056,-1704388056)
    (20954553,-1086643525)
    (-960203107,-1086643525)
    (-1086643525,-1086643525)
    (755428178,1178496)
    (-754014823,-754014823)
    (1015193742,-1704388056)
    (20892884,1178496)
    (1252357783,-1086643525)
    (21308021,-1704388056)
    (1045114,-1086643525)
    (23297811,-754014823)
    (575291404,1178496)*/
    val idmaps = vertices1.collectAsMap()
    val bc = spark.sparkContext.broadcast(idmaps)

    val result = ds.map(line => {
      val seg0 = line.split(",").filter(seg => StringUtils.isNotBlank(seg))(0)
      val bc_map = bc.value
      val gid = bc_map.get(seg0.hashCode.toLong).get
      gid + "," + line
    })

   /* +-------------------------------------------+
    |value                                      |
    +-------------------------------------------+
    |-1704388056,13760883112,冯长春,fcclzydouble|
      |-1704388056,冯长春,fcclzydouble,46210953   |
      |-1704388056,46210953,fcclzydouble,缙捷     |
      |-1086643525,15820284628,刘子英,lzyfcc      |
      |-1086643525,刘子英,lzyfcc,402571231        |
      |-1086643525,402571231,老牛,南方设计院      |
      |-754014823,冯志云,1020426868,fzy           |
      |-754014823,fzy,小云朵,华荟幼儿园           |
      |1178496,冯志远,fengzhiyuan,1580251905      |
      |1178496,1580251905,建筑学校,远远           |
      +-------------------------------------------+*/
    result.show(10,false)

  }
}
